library(pbapply)
library(EnsDb.Hsapiens.v79)
## Loading required package: ensembldb
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
##
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
##
## filter
dropout_percent <- c(0,5,10,20,30,40,50,75,80,85,90,95)
data_in <- sapply(dropout_percent, function(y) paste0("/Users/elise/Desktop/GitHub/Hubness_sc/bulkRNAseq/simul",y,".csv"))
data_out <- sapply(dropout_percent, function(y) paste0("/Users/elise/Desktop/GitHub/Hubness_sc/bulkRNAseq/simul_pca_readyforhubness",y,".csv"))
library(circlize)
## ========================================
## circlize version 0.4.9
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.4.2
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(ggplot2)
zero_percent <- function(data) { # gene x cell matrix
return(sum(data==0)/ncol(data)/nrow(data)*100)
}
data_prep <- function(data) {
data <- log10(data+1)
return(data)
}
dist_heatmap <- function(dist) {
col_fun = colorRamp2(c(0,0.5,1),heat.colors(n=3))
Heatmap(as.matrix(dist), cluster_rows = T, cluster_columns = T, col = col_fun,
border = T, show_column_names = F, show_row_names = F, name = "Euclidean distance")
}
max_min_ratio <- function(dist) {
tmp <- as.matrix(dist)
diag(tmp) <- NA
ratios <- apply(tmp, 1, function(x) max(x, na.rm = T)/min(x, na.rm = T))
return(ratios)
}
relative_contrast <- function(dist) {
tmp <- as.matrix(dist)
diag(tmp) <- NA
rc <- apply(tmp, 1, function(x) abs(max(x, na.rm = T)-min(x, na.rm = T))/min(x, na.rm = T))
return(rc)
}
data <- pblapply(X=data_in, FUN=function(x) read.table( file=x))
data <- pblapply(data, function(x) return(x[grep("^ENS",rownames(x)),])) # 57760 x 1213
data <- pblapply(data, function(x) {rownames(x) <- gsub("\\..*","",rownames(x)); return(x)})
# Sparsity
dropout <- sapply(data,function(x) zero_percent(x)) # 46%, 49%, 55%, 60%, 66%, 72%, 86%, 88%, 92%, 94%, 97%
# Replace with genes names, remove duplicates
genes <- ensembldb::select(EnsDb.Hsapiens.v79, keys=rownames(data[[1]]), keytype = "GENEID", columns = c("SYMBOL"))$SYMBOL
duplicates <- duplicated(genes)
data <- pblapply(data, function(x) {x <- x[!duplicates,];
rownames(x) <- unique(genes);
return(x)}) # 56029 x 1213
# Keep 10k HVG
data <- pblapply(data, function(x) {hvg <- names(sort(apply(x,1,var), decreasing = T)[1:1e4]); return(x[hvg,])})
# make the distance heatmap
pairwise_dist <- pblapply(data, function(x) dist(t(x)))
pairwise_dist_norm <- pblapply(pairwise_dist, function(x) x/max(x))
pblapply(pairwise_dist_norm, dist_heatmap)
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

##
## [[12]]

# look at relative contrast
ratio <- pbsapply(pairwise_dist_norm, max_min_ratio)
rc <- pbsapply(pairwise_dist_norm, relative_contrast)
n_cell <- nrow(rc)
rc <- data.frame("Relative_contrast"=as.vector(rc),
"Dropout_rate"=rep(dropout_percent,each=n_cell))
ratio <- data.frame("Ratio"=as.vector(ratio),
"Dropout_rate"=rep(dropout_percent,each=n_cell))
ggplot(rc, aes(x=Relative_contrast, fill=factor(Dropout_rate))) +
geom_density(aes(group=Dropout_rate), alpha=0.5)

ggplot(ratio, aes(x=Ratio, fill=factor(Dropout_rate))) +
geom_density(aes(group=Dropout_rate), alpha=0.5)

nPC = 10
data_pca <- pblapply(X=data_out, FUN=function(x) read.table(file=x))
pca <- pblapply(data_pca, function(x) return(x[1:nPC,]))
# make the distance heatmap
pairwise_dist_pca <- pblapply(pca, function(x) dist(t(x)))
pairwise_dist_norm_pca <- pblapply(pairwise_dist_pca, function(x) x/max(x))
pblapply(pairwise_dist_norm_pca, dist_heatmap)
## [[1]]

##
## [[2]]
